Load packages

library(Seurat)
library(ggplot2)
library(cowplot)

Load previously saved datasets and add annotation

cdS1 <- readRDS("~/Documents/Experiments1/scRNAseq/Cerebellum_publication/allCells/data/Seurat3.rds")
cdS1@meta.data$dataset <- "E13"
colnames(cdS1@meta.data)[1:2] <- c("nCount_RNA","nFeature_RNA")

cdS2 <- readRDS("./E14/Seurat_E14.rds")
cdS3 <- readRDS("./E15/Seurat_E15.rds")
cdS4 <- readRDS("./E16/Seurat_E16.rds")

cdS1@meta.data$dataset1 <- "E13"
cdS2@meta.data$dataset1 <- "E14"
cdS3@meta.data$dataset1 <- "E15"
cdS4@meta.data$dataset1 <- "E16"

Dataset preprocessing

cdS <- merge(cdS1, c(cdS2, cdS3, cdS4))
cb.list <- SplitObject(object = cdS, split.by = "dataset1")

Normalize data and find variable genes

for (i in 1:length(x = cb.list)) {
  cb.list[[i]] <- NormalizeData(object = cb.list[[i]], verbose = FALSE)
  cb.list[[i]] <- FindVariableFeatures(object = cb.list[[i]], 
                                             selection.method = "vst", nfeatures = 2000, verbose = FALSE)
}

Integrate the deatasets

anchors <- FindIntegrationAnchors(object.list = cb.list, dims = 1:30)
cdS.i <- IntegrateData(anchorset = anchors, dims = 1:30)

switch to integrated assay

DefaultAssay(object = cdS.i) <- "integrated"

Run the standard workflow for visualization and clustering

cdS.i <- ScaleData(object = cdS.i, verbose = FALSE)
cdS.i <- RunPCA(object = cdS.i, npcs = 40, verbose = FALSE)
ElbowPlot(cdS.i, ndims = 40, reduction = "pca")

Find clusters

cdS.i <- FindNeighbors(cdS.i, reduction = "pca", assay = "integrated", dims = 1:23)
Computing nearest neighbor graph
Computing SNN
cdS.i <- FindClusters(cdS.i, modularity.fxn = 1,
                    resolution = 1, algorithm = 1, n.start = 10, n.iter = 10,
                    random.seed = 0, temp.file.location = NULL, edge.file.name = NULL,
                    verbose = F)

Inspect cell clusters

myplots <- list()
for (n in 1:length(res)){
  res1 <- paste("integrated_snn_res",res[n],sep=".")
  Idents(object = cdS.i) <- cdS.i@meta.data[,res1]
  myplots[[n]] <- DimPlot(cdS.i, reduction = "tsne", group.by = "ident", label=T)+
    NoAxes()+NoLegend()
}
myplots1 <- list()
for (n in 1:length(res)){
  res1 <- paste("integrated_snn_res",res[n],sep=".")
  Idents(object = cdS.i) <- cdS.i@meta.data[,res1]
  myplots1[[n]] <- DimPlot(cdS.i, reduction = "umap", group.by = "ident", label=T)+
    NoAxes()+NoLegend()
}
gridExtra::grid.arrange(grobs= myplots, ncol = 2, top = paste0("resolutions: ", res))

gridExtra::grid.arrange(grobs= myplots1, ncol = 2, top = paste0("resolutions: ", res))

res1 <- paste("integrated_snn_res",2,sep=".")
Idents(object = cdS.i) <- cdS.i@meta.data[,res1]

Visualize the results with t-SNE

Visualize the results with UMAP

Visualize the results with UMAP

cdS.i <- RunUMAP(object = cdS.i, dims = 1:23, reduction = "pca",
                      features = NULL, assay = "integrated", nneighbors = 65L, max.dim = 2L,
                      min.dist = 0.4, reduction.name = "umap", reduction.key = "UMAP_",
                      metric = "correlation", seed.use = 42)
DimPlot(cdS.i, reduction = "umap", group.by = "ident", label=T)
DimPlot(cdS.i, reduction = "umap", group.by = "cellType", label=T, split.by = "dataset1")+
  NoAxes()+NoLegend()

DimPlot(cdS.i, reduction = "tsne", group.by = "cellType", label=T, split.by = "dataset1")+
  NoAxes()+NoLegend()

p1 <- DimPlot(cdS.i, reduction = "tsne", group.by = "cellType", label=T, split.by = "dataset1", repel = T)+
  NoAxes()+NoLegend()
p2 = DimPlot(cdS.i, reduction = "umap", group.by = "cellType", label=T, split.by = "dataset1", repel = T)+
  NoAxes()+NoLegend()
p3 <- DimPlot(cdS.i, reduction = "tsne", group.by = "dataset1", label=F)+
  NoAxes()
p4 <- DimPlot(cdS.i, reduction = "tsne", group.by = "ident", label=T)+
  NoAxes()+NoLegend()
p5 = cowplot::plot_grid(p3,p4, rel_widths = c(1,0.8))
p6 <- DimPlot(cdS.i, reduction = "umap", group.by = "dataset1", label=F)+
  NoAxes()
p7 <- DimPlot(cdS.i, reduction = "umap", group.by = "ident", label=T)+
  NoAxes()+NoLegend()
p8 = cowplot::plot_grid(p6,p7, rel_widths = c(1,0.8))
pdf("summary.pdf", h=15, w=10)
cowplot::plot_grid(p5,p1, ncol =1, rel_heights = c(0.9,2))
cowplot::plot_grid(p8,p2, ncol =1, rel_heights = c(0.9,2))
dev.off()
null device 
          1 
p1 <- DimPlot(cdS.i, reduction = "tsne", group.by = "ident", label=T) +
  NoAxes()+NoLegend()
p2 <- DimPlot(cdS.i, reduction = "umap", group.by = "ident", label=T)+
  NoAxes()+NoLegend()
p3 <- DimPlot(cdS.i, reduction = "umap", group.by = "dataset1", label=F)
p4 <- DimPlot(cdS.i, reduction = "umap", group.by = "cellType", label=T)+
  NoAxes()+NoLegend()
cowplot::plot_grid(p1,p2,p3,p4)

p5 <- DimPlot(cdS.i, reduction = "tsne", group.by = "cellType", repel = T, label=T, split.by = "dataset1") +
  NoAxes()+NoLegend()
p6 <- DimPlot(cdS.i, reduction = "umap", group.by = "cellType", repel = T, label=T, split.by = "dataset1")+
  NoAxes()+NoLegend()
cowplot::plot_grid(p5,p6, ncol =1)

Save the Seurat object

saveRDS(file = "Seurat_merge.rds",cdS.i)
mGenes <- readRDS(file="/Volumes/jali/Genome/Annotation1.rds")
sig_genes <- FindAllMarkers(cdS.i, features = NULL,
                            logfc.threshold = 0.25, test.use = "wilcox", min.pct = 0.1,
                            min.diff.pct = -Inf, verbose = F, only.pos = TRUE,
                            max.cells.per.ident = Inf, random.seed = 1, latent.vars = NULL,
                            min.cells.feature = 3, min.cells.group = 3, pseudocount.use = 1,
                            return.thresh = 0.01)

sig_genes <- mutate(sig_genes,pct.diff=pct.1-pct.2)

sig_genes$TF <- "no"
id <- intersect(mGenes$mgi_symbol,sig_genes$gene)

x <- sig_genes[sig_genes$gene %in% id,]
sig_genes$TF[sig_genes$gene %in% id] <- mGenes$Type[match(x$gene,mGenes$mgi_symbol)]
sig_genes$description <- mGenes$description[match(as.character(sig_genes$gene),mGenes$mgi_symbol)]
head(sig_genes)

sig_genes <- sig_genes[, c("gene","TF","description","p_val","p_val_adj","avg_logFC","pct.1","pct.2","pct.diff","cluster")]
openxlsx::write.xlsx(sig_genes,"sigGenes.xlsx")

top <- sig_genes %>% group_by(cluster) %>% top_n(5, avg_logFC);top
tab <- as.data.frame(table(Idents(object = cdS.i)))
top$cellNumber <- tab$Freq[match(top$cluster,tab$Var1)]
top$percentage <- round(top$cellNumber/ncol(cdS.i)*100, digits = 2)
openxlsx::write.xlsx(top,"sigGenes_top5.xlsx")

Annotate cell cluster

Annotate the cell culster in Excel.

top5 <- openxlsx::read.xlsx("sigGenes_sum.xlsx", sheet =2)
top5a <- top5[!duplicated(top5$cluster),]
top5a$cellType
 [1] "PC.Etv1"   "PC.Foxp1"  "PC.Nrgn"   "PC.En1"    "GABA.Pre"  "NPC.S"     "EGL.S"     "BG"        "EGL.S1"    "EGL"       "GC1"       "GABA.Pre1" "C1"        "IN"       
[15] "EGL.Pvalb" "GC2"       "EGL.M"     "NPC.M"     "Tlx3"      "BG"        "PC.Cck"    "GABA.Pro"  "IN"        "lCN"       "GABA.Pro"  "EGL.M1"    "mCN"       "NPC.M"    
[29] "lCN?"      "PC.Cdh9"   "AS"        "AS"        "NPC"       "Peri"      "Endo"      "MidO"      "Micro"     "Eryth"     "Isl1"     
cdS.i@meta.data$cellType1 <- plyr::mapvalues(x = Idents(cdS.i), from = top5a$cluster, to = top5a$cellType)
DimPlot(cdS.i, reduction = "tsne", group.by = "cellType1", label=T, repel = T)+
  NoLegend()

DimPlot(cdS.i, reduction = "umap", group.by = "cellType1", label=T, repel = T)+
  NoLegend()

table(cdS.i@meta.data$cellType1, cdS.i@meta.data$dataset1)
           
            E13 E14 E15 E16
  PC.Etv1   955 482 323  52
  PC.Foxp1  830 488 314  68
  PC.Nrgn   747 482 294  72
  PC.En1    823 326 244  51
  GABA.Pre  542 477 295 113
  NPC.S     457 306 283 173
  EGL.S     197 159 396 440
  BG        480 499 484 342
  EGL.S1    163 136 331 432
  EGL       133 139 365 420
  GC1       153 165 310 395
  GABA.Pre1 311 324 224 111
  C1        306 164 281  91
  IN        319 347 436 298
  EGL.Pvalb 152 150 238 240
  GC2       168  48 189 335
  EGL.M     120  97 232 271
  NPC.M     438 306 277 159
  Tlx3      171 338 140  22
  PC.Cck    298 150 145  20
  GABA.Pro  345 344 277 172
  lCN       138 271  94  32
  EGL.M1     84  58 167 214
  mCN       180 101 111 120
  lCN?      103 181  93  31
  PC.Cdh9   111  90 144  35
  AS        172 187 224 154
  NPC       254  41  48   4
  Peri       41  80  97 109
  Endo       36  63  90  48
  MidO       25  94  85  31
  Micro       4  37  57  54
  Eryth      37   7  22  55
  Isl1       13  57  25   7
p1 <- DimPlot(cdS.i, reduction = "tsne", group.by = "cellType1", label=T, split.by = "dataset1", repel = T)+
  NoAxes()+NoLegend()
p2 = DimPlot(cdS.i, reduction = "umap", group.by = "cellType1", label=T, split.by = "dataset1", repel = T)+
  NoAxes()+NoLegend()
p3 <- DimPlot(cdS.i, reduction = "tsne", group.by = "dataset1", label=F)+
  NoAxes()
p4 <- DimPlot(cdS.i, reduction = "tsne", group.by = "ident", label=T)+
  NoAxes()+NoLegend()
p5 = cowplot::plot_grid(p3,p4, rel_widths = c(1,0.8))
p6 <- DimPlot(cdS.i, reduction = "umap", group.by = "dataset1", label=F)+
  NoAxes()
p7 <- DimPlot(cdS.i, reduction = "umap", group.by = "ident", label=T)+
  NoAxes()+NoLegend()
p8 = cowplot::plot_grid(p6,p7, rel_widths = c(1,0.8))
pdf("summary1.pdf", h=15, w=10)
cowplot::plot_grid(p5,p1, ncol =1, rel_heights = c(0.9,2))
cowplot::plot_grid(p8,p2, ncol =1, rel_heights = c(0.9,2))
dev.off()
null device 
          1 
---
title: "R Notebook"
output: html_notebook
---

# Load packages
```{r load-packages, message=F, warning=F}
library(Seurat)
library(ggplot2)
library(cowplot)
```


# Load previously saved datasets and add annotation
```{r}
cdS1 <- readRDS("~/Documents/Experiments1/scRNAseq/Cerebellum_publication/allCells/data/Seurat3.rds")
cdS1@meta.data$dataset <- "E13"
colnames(cdS1@meta.data)[1:2] <- c("nCount_RNA","nFeature_RNA")

cdS2 <- readRDS("./E14/Seurat_E14.rds")
cdS3 <- readRDS("./E15/Seurat_E15.rds")
cdS4 <- readRDS("./E16/Seurat_E16.rds")

cdS1@meta.data$dataset1 <- "E13"
cdS2@meta.data$dataset1 <- "E14"
cdS3@meta.data$dataset1 <- "E15"
cdS4@meta.data$dataset1 <- "E16"
```


# Dataset preprocessing
```{r}
cdS <- merge(cdS1, c(cdS2, cdS3, cdS4))
cb.list <- SplitObject(object = cdS, split.by = "dataset1")
```

# Normalize data and find variable genes
```{r}
for (i in 1:length(x = cb.list)) {
  cb.list[[i]] <- NormalizeData(object = cb.list[[i]], verbose = FALSE)
  cb.list[[i]] <- FindVariableFeatures(object = cb.list[[i]], 
                                             selection.method = "vst", nfeatures = 2000, verbose = FALSE)
}
```

# Integrate the deatasets
```{r}
anchors <- FindIntegrationAnchors(object.list = cb.list, dims = 1:30)
cdS.i <- IntegrateData(anchorset = anchors, dims = 1:30)
```

# switch to integrated assay
```{r}
DefaultAssay(object = cdS.i) <- "integrated"
```

# Run the standard workflow for visualization and clustering
```{r}
cdS.i <- ScaleData(object = cdS.i, verbose = FALSE)
cdS.i <- RunPCA(object = cdS.i, npcs = 40, verbose = FALSE)
ElbowPlot(cdS.i, ndims = 40, reduction = "pca")
```

# Find clusters
```{r}
cdS.i <- FindNeighbors(cdS.i, reduction = "pca", assay = "integrated", dims = 1:23)
```


```{r}
res <- seq(0.5,4.0,0.5)
cdS.i <- FindClusters(cdS.i, modularity.fxn = 1,
                    resolution = res, algorithm = 1, n.start = 10, n.iter = 10,
                    random.seed = 0, temp.file.location = NULL, edge.file.name = NULL,
                    verbose = F)
```

# Inspect cell clusters
```{r fig.width=4, fig.asp=2}
myplots <- list()
for (n in 1:length(res)){
  res1 <- paste("integrated_snn_res",res[n],sep=".")
  Idents(object = cdS.i) <- cdS.i@meta.data[,res1]
  myplots[[n]] <- DimPlot(cdS.i, reduction = "tsne", group.by = "ident", label=T)+
    NoAxes()+NoLegend()
}
myplots1 <- list()
for (n in 1:length(res)){
  res1 <- paste("integrated_snn_res",res[n],sep=".")
  Idents(object = cdS.i) <- cdS.i@meta.data[,res1]
  myplots1[[n]] <- DimPlot(cdS.i, reduction = "umap", group.by = "ident", label=T)+
    NoAxes()+NoLegend()
}

gridExtra::grid.arrange(grobs= myplots, ncol = 2, top = paste0("resolutions: ", res))
gridExtra::grid.arrange(grobs= myplots1, ncol = 2, top = paste0("resolutions: ", res))
```

```{r}
res1 <- paste("integrated_snn_res",2,sep=".")
Idents(object = cdS.i) <- cdS.i@meta.data[,res1]
```


# Visualize the results with t-SNE
```{r}
cdS.i <- RunTSNE(cdS.i, reduction = "pca", cells = NULL,
               dims = 1:23, features = NULL, seed.use = 1, tsne.method = "Rtsne",
               add.iter = 0, dim.embed = 2, distance.matrix = NULL,
               reduction.name = "tsne", reduction.key = "tSNE_", perplexity = 50)
DimPlot(cdS.i, reduction = "tsne", group.by = "ident", label=T)
```

# Visualize the results with UMAP
```{r}
cdS.i <- RunUMAP(object = cdS.i, dims = 1:23, reduction = "pca",
                      features = NULL, assay = "integrated", nneighbors = 65L, max.dim = 2L,
                      min.dist = 0.4, reduction.name = "umap", reduction.key = "UMAP_",
                      metric = "correlation", seed.use = 42)
DimPlot(cdS.i, reduction = "umap", group.by = "ident", label=T)
```

# Visualize the results with UMAP
```{r}
cdS.i <- RunUMAP(object = cdS.i, dims = 1:23, reduction = "pca",
                      features = NULL, assay = "integrated", nneighbors = 65L, max.dim = 2L,
                      min.dist = 0.4, reduction.name = "umap", reduction.key = "UMAP_",
                      metric = "correlation", seed.use = 42)
DimPlot(cdS.i, reduction = "umap", group.by = "ident", label=T)
```

```{r fig.asp=1}
DimPlot(cdS.i, reduction = "umap", group.by = "cellType", label=T, split.by = "dataset1")+
  NoAxes()+NoLegend()
```

```{r fig.asp=1}
DimPlot(cdS.i, reduction = "tsne", group.by = "cellType", label=T, split.by = "dataset1")+
  NoAxes()+NoLegend()
```

```{r}
p1 <- DimPlot(cdS.i, reduction = "tsne", group.by = "cellType", label=T, split.by = "dataset1", repel = T)+
  NoAxes()+NoLegend()
p2 = DimPlot(cdS.i, reduction = "umap", group.by = "cellType", label=T, split.by = "dataset1", repel = T)+
  NoAxes()+NoLegend()



p3 <- DimPlot(cdS.i, reduction = "tsne", group.by = "dataset1", label=F)+
  NoAxes()
p4 <- DimPlot(cdS.i, reduction = "tsne", group.by = "ident", label=T)+
  NoAxes()+NoLegend()
p5 = cowplot::plot_grid(p3,p4, rel_widths = c(1,0.8))

p6 <- DimPlot(cdS.i, reduction = "umap", group.by = "dataset1", label=F)+
  NoAxes()
p7 <- DimPlot(cdS.i, reduction = "umap", group.by = "ident", label=T)+
  NoAxes()+NoLegend()
p8 = cowplot::plot_grid(p6,p7, rel_widths = c(1,0.8))

```

```{r}
pdf("summary.pdf", h=15, w=10)
cowplot::plot_grid(p5,p1, ncol =1, rel_heights = c(0.9,2))
cowplot::plot_grid(p8,p2, ncol =1, rel_heights = c(0.9,2))
dev.off()
```



```{r}
p1 <- DimPlot(cdS.i, reduction = "tsne", group.by = "ident", label=T) +
  NoAxes()+NoLegend()
p2 <- DimPlot(cdS.i, reduction = "umap", group.by = "ident", label=T)+
  NoAxes()+NoLegend()
p3 <- DimPlot(cdS.i, reduction = "umap", group.by = "dataset1", label=F)
p4 <- DimPlot(cdS.i, reduction = "umap", group.by = "cellType", label=T)+
  NoAxes()+NoLegend()

cowplot::plot_grid(p1,p2,p3,p4)
```



```{r fig.width=8, fig.asp=1.5}
p5 <- DimPlot(cdS.i, reduction = "tsne", group.by = "cellType", repel = T, label=T, split.by = "dataset1") +
  NoAxes()+NoLegend()
p6 <- DimPlot(cdS.i, reduction = "umap", group.by = "cellType", repel = T, label=T, split.by = "dataset1")+
  NoAxes()+NoLegend()
cowplot::plot_grid(p5,p6, ncol =1)
```

# Save the Seurat object
```{r}
saveRDS(file = "Seurat_merge.rds",cdS.i)
```


```{r}
mGenes <- readRDS(file="/Volumes/jali/Genome/Annotation1.rds")
sig_genes <- FindAllMarkers(cdS.i, features = NULL,
                            logfc.threshold = 0.25, test.use = "wilcox", min.pct = 0.1,
                            min.diff.pct = -Inf, verbose = F, only.pos = TRUE,
                            max.cells.per.ident = Inf, random.seed = 1, latent.vars = NULL,
                            min.cells.feature = 3, min.cells.group = 3, pseudocount.use = 1,
                            return.thresh = 0.01)

sig_genes <- mutate(sig_genes,pct.diff=pct.1-pct.2)

sig_genes$TF <- "no"
id <- intersect(mGenes$mgi_symbol,sig_genes$gene)

x <- sig_genes[sig_genes$gene %in% id,]
sig_genes$TF[sig_genes$gene %in% id] <- mGenes$Type[match(x$gene,mGenes$mgi_symbol)]
sig_genes$description <- mGenes$description[match(as.character(sig_genes$gene),mGenes$mgi_symbol)]
head(sig_genes)

sig_genes <- sig_genes[, c("gene","TF","description","p_val","p_val_adj","avg_logFC","pct.1","pct.2","pct.diff","cluster")]
openxlsx::write.xlsx(sig_genes,"sigGenes.xlsx")

top <- sig_genes %>% group_by(cluster) %>% top_n(5, avg_logFC);top
tab <- as.data.frame(table(Idents(object = cdS.i)))
top$cellNumber <- tab$Freq[match(top$cluster,tab$Var1)]
top$percentage <- round(top$cellNumber/ncol(cdS.i)*100, digits = 2)
openxlsx::write.xlsx(top,"sigGenes_top5.xlsx")
```

# Annotate cell cluster
Annotate the cell culster in Excel.
```{r fig.width=5, fig.asp=1}
top5 <- openxlsx::read.xlsx("sigGenes_sum.xlsx", sheet =2)
top5a <- top5[!duplicated(top5$cluster),]
top5a$cellType

cdS.i@meta.data$cellType1 <- plyr::mapvalues(x = Idents(cdS.i), from = top5a$cluster, to = top5a$cellType)
DimPlot(cdS.i, reduction = "tsne", group.by = "cellType1", label=T, repel = T)+
  NoLegend()
DimPlot(cdS.i, reduction = "umap", group.by = "cellType1", label=T, repel = T)+
  NoLegend()

```

```{r}
table(cdS.i@meta.data$cellType1, cdS.i@meta.data$dataset1)
```


```{r}
p1 <- DimPlot(cdS.i, reduction = "tsne", group.by = "cellType1", label=T, split.by = "dataset1", repel = T)+
  NoAxes()+NoLegend()
p2 = DimPlot(cdS.i, reduction = "umap", group.by = "cellType1", label=T, split.by = "dataset1", repel = T)+
  NoAxes()+NoLegend()

p3 <- DimPlot(cdS.i, reduction = "tsne", group.by = "dataset1", label=F)+
  NoAxes()
p4 <- DimPlot(cdS.i, reduction = "tsne", group.by = "ident", label=T)+
  NoAxes()+NoLegend()
p5 = cowplot::plot_grid(p3,p4, rel_widths = c(1,0.8))

p6 <- DimPlot(cdS.i, reduction = "umap", group.by = "dataset1", label=F)+
  NoAxes()
p7 <- DimPlot(cdS.i, reduction = "umap", group.by = "ident", label=T)+
  NoAxes()+NoLegend()
p8 = cowplot::plot_grid(p6,p7, rel_widths = c(1,0.8))

```

```{r}
pdf("summary1.pdf", h=15, w=10)
cowplot::plot_grid(p5,p1, ncol =1, rel_heights = c(0.9,2))
cowplot::plot_grid(p8,p2, ncol =1, rel_heights = c(0.9,2))
dev.off()
```

# Print sesstion information
```{r}
sessionInfo()
```

